# Updated by Wei 20140613 at school
# Updated by Wei 20140518 at school
# build the probability table based on the numerator and denominator table
print "Program Begins..."
# Load the denominator table
# key: classLabelBasedOnListLength
# values: in the list format
denominatorValueDict = {}
# on dodo:
# inputFileName2 = "/home/diaosi/gov2ClearYourMindAndDoItAgain/relRankFilesRelated/relRank_20140118Night_Denominator_stepGap_one3rd.csv"
# on moa:
# inputFileName2 = "/home/diaosi/workspace/web-search-engine-wei-2014-April/data/secondFactorProbability/denominatorTable_fromTOP100Postings_20140518.csv"
# on vidaserver1
inputFileName2 = "/local_scratch/wei/workspace/NYU_IRTK/data/secondFactorProbability/clueweb09B_denominator_table_20140613.csv"
inputFileHandler = open(inputFileName2,"r")
for line in inputFileHandler.readlines():
    lineElements = line.strip().split(" ")
    currentKey = lineElements[0]

    if currentKey not in denominatorValueDict:
        denominatorValueDict[currentKey] = []

    for value in lineElements[1:]:
        denominatorValueDict[currentKey].append(float(value))

print "len(denominatorValueDict): ",len(denominatorValueDict)
print "denominatorValueDict['1']: ",denominatorValueDict['1']
inputFileHandler.close()

# on dodo:
# outputFileName = "/home/diaosi/gov2ClearYourMindAndDoItAgain/relRankFilesRelated/relRank_20140119Afternoon_Probability.csv"
# on moa:
# outputFileName = "/home/diaosi/workspace/web-search-engine-wei-2014-April/data/secondFactorProbability/probabilityTable_fromTOP100Postings_20140518.csv"
# on vidaserver1
outputFileName = "/local_scratch/wei/workspace/NYU_IRTK/data/secondFactorProbability/clueweb09B_probability_table_20140613.csv"
outputFileHandler = open(outputFileName,"w")

# key: classLabelBasedOnListLength
# values: in the list format
numeratorValueDict = {}
# on dodo:
# inputFileName1 = "/home/diaosi/gov2ClearYourMindAndDoItAgain/relRankFilesRelated/relRank_20140119Afternoon_Numerator_stepGap_one3rd.csv"
# on moa:
# inputFileName1 = "/home/diaosi/workspace/web-search-engine-wei-2014-April/data/secondFactorProbability/numeratorTable_fromTOP100Postings_20140518.csv"
# on vidaserver1
inputFileName1 = "/local_scratch/wei/workspace/NYU_IRTK/data/secondFactorProbability/clueweb09B_numerator_table_20140613.csv"
inputFileHandler = open(inputFileName1,"r")
for line in inputFileHandler.readlines():
    lineElements = line.strip().split(" ")
    currentKey = lineElements[0]
    #currentClassLowerBoundBasedOnListLength = int(lineElements[1])
    #currentNumOfTermsBelongingToFromNumerator = int(lineElements[2])
    #currentNumOfRanges = int(lineElements[3])
    
    # Current version
    outputLine = lineElements[0] + " "
    
    # OLD version
    # outputLine = lineElements[0] + " " + lineElements[1] + " " + lineElements[2] + " " + lineElements[3] + " "
    
    
    if currentKey not in numeratorValueDict:
        numeratorValueDict[currentKey] = []

    for index,value in enumerate( lineElements[1:] ):
        numeratorValueDict[currentKey].append(float(value))
        outputLine += str( float(value) / denominatorValueDict[currentKey][index] ) + " "

    outputLine = outputLine.strip() + "\n"
    outputFileHandler.write(outputLine)

print "len(numeratorValueDict): ",len(numeratorValueDict)
print "numeratorValueDict['1']: ",numeratorValueDict['1']
inputFileHandler.close()
outputFileHandler.close()
print "Overall Processing Statistics:"
print "inputFileName1: ",inputFileName1
print "inputFileName2: ",inputFileName2
print "outputFileName: ",outputFileName
print "Program Ends."
